
source('utils.R')

make_analysis_df <- function(cycle){
  
  this = open_dataset(glue('final_long/cycle={cycle}')) %>%
    filter(in_cl == 1, in_l2 == 1, total > 0) %>%
    mutate(
      cycle = as.integer(cycle),
      r_ind = if_else(r_pcc > 0 | r_sp > 0 | r_jfc > 0, 1, 0, missing = 0),
      d_ind = if_else(d_pcc > 0 | d_sp > 0 | d_jfc > 0, 1, 0, missing = 0),
      r_ind_pcc_only = if_else(r_pcc > 0, 1, 0, missing = 0),
      d_ind_pcc_only = if_else(d_pcc > 0, 1, 0, missing = 0),
      r_val = if_else(r_pcc + r_sp + r_jfc > 0, r_pcc + r_sp + r_jfc, 0, missing = 0),
      d_val = if_else(d_pcc + d_sp + d_jfc > 0, d_pcc + d_sp + d_jfc, 0, missing = 0),
      r_val_pcc_only = if_else(r_pcc + r_sp > 0, r_pcc + r_sp, 0, missing = 0),
      d_val_pcc_only = if_else(d_pcc > 0, d_pcc, 0, missing = 0),
    ) %>%
    select(
      component, total, resid_state,
      r_ind, d_ind, r_ind_pcc_only, d_ind_pcc_only, r_val, d_val, r_val_pcc_only, d_val_pcc_only,
      education, gender, ethnicity, tax
    ) %>%
    collect() %>%
    as.data.frame()
  
  setDT(this)
  
  this[, nat_quant := make_bins(total)]
  this[, st_quant := make_bins(total), by = resid_state]
  
  this[, nat_quant_fine := make_bins(total, seq(0, 1, 0.001))]
  
  this[, nat_dec := make_bins(total, seq(0, 1, .1))]
  
  write_parquet(this, glue('df{substr(cycle,3,4)}.parquet'))
  
  rm(this); gc()
}

walk(c(2012L,2016L,2020L), make_analysis_df)

make_full_analysis_df <- function(cycle){
  this = open_dataset(glue('final_long/cycle={cycle}')) %>%
    filter(in_l2 == 1) %>%
    mutate(
      cycle = as.integer(cycle),
      r_ind = if_else(r_pcc > 0 | r_sp > 0 | r_jfc > 0, 1, 0, missing = 0),
      d_ind = if_else(d_pcc > 0 | d_sp > 0 | d_jfc > 0, 1, 0, missing = 0),
      r_ind_no_jfc = if_else(r_pcc > 0 | r_sp > 0, 1, 0, missing = 0),
      d_ind_no_jfc = if_else(d_pcc > 0 | d_sp > 0, 1, 0, missing = 0),
      r_val = if_else(r_pcc + r_sp + r_jfc > 0, r_pcc + r_sp + r_jfc, 0, missing = 0),
      d_val = if_else(d_pcc + d_sp + d_jfc > 0, d_pcc + d_sp + d_jfc, 0, missing = 0),
      r_val_no_jfc = if_else(r_pcc + r_sp > 0, r_pcc + r_sp, 0, missing = 0),
      d_val_no_jfc = if_else(d_pcc + d_sp > 0, d_pcc + d_sp, 0, missing = 0),
    ) %>%
    select(
      component, total, resid_state,
      r_ind, d_ind, r_ind_no_jfc, d_ind_no_jfc, r_val, d_val, r_val_no_jfc, d_val_no_jfc,
      education, gender, ethnicity, age20, tax, income_est, net_worth, in_cl
    ) %>%
    collect() %>%
    as.data.frame()
  
  setDT(this)
  
  this[in_cl == 1 & total > 0,  nat_quant := make_bins(total)]
  this[in_cl == 1 & total > 0,  st_quant := make_bins(total), by = resid_state]
  this[in_cl == 1 & total > 0,  nat_quant_fine := make_bins(total, seq(0,1,0.001))]
  
  write_parquet(this, glue('df{substr(cycle,3,4)}_full.parquet'))
  
  rm(this); gc()
  
}

walk(c(2012L,2016L,2020L), make_full_analysis_df)
